
  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      18.0
___/   /   /___/   /   /___/       MP—Parallel Edition

 Statistics and Data Science       Copyright 1985-2023 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: 22-user 8-core network, expiring 30 Jun 2025
Serial number: 501809309427
  Licensed to: The University of Chicago Booth School of Business
               Mercury Computing Cluster

Notes:
      1. Stata is running in batch mode.
      2. Unicode is supported; see help unicode_advice.
      3. More than 2 billion observations are allowed; see help obs_advice.
      4. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.

. do "/project/fagoolsb/git/service_industries/replication_package/builds/1_mai
> n_build/1_2_main_build.do" 

. * Author: Joe Tatarka
. * Name: 2_main_build.do
. * Last Updated Date: April 15, 2025
. * Purpose: Take data spend and visits merged data from 1_merge_visits_and_spe
> nd.ipynb and clean 
. 
. * Set Global File Paths
. global raw_root = "/project/fagoolsb/service_industries/replication_package/d
> atasets/raw"

. global intermediate_root = "/project/fagoolsb/service_industries/replication_
> package/datasets/intermediate/1_main_build"

. global save_root = "/project/fagoolsb/service_industries/replication_package/
> datasets/built"

. 
. *****************************************************************************
> ***
. *** Part 1: Load in things to merge 
. *****************************************************************************
> ***
. 
. ************************************
. *** 1.1 Bring in inflation deflators 
. *************************************
. **** Limited-Service Deflators
. tempfile cpi_limited

. import excel "${raw_root}/cpi/limited_cpi_nsa_feb_2025.xlsx", clear firstrow 
> cellrange(A12:D349)
(4 vars, 337 obs)

. 
. rename *, lower

. drop seriesid 

. rename value cpi_limited

. drop if substr(period, 1,1) == "S"
(12 observations deleted)

. 
. replace period = substr(period,2,2)
(325 real changes made)

. destring period, replace 
period: all characters numeric; replaced as byte

. 
. gen year_month_gs = ym(year, period)

. format year_month_gs %tm

. 
. gen base = cpi_limited if year_month_gs == monthly("Jan2019", "MY")
(324 missing values generated)

. egen base_jan_2019 = max(base)

. 
. replace cpi_limited = 100*(cpi_limited/base_jan_2019)
(325 real changes made)

. 
. keep if year_month_gs >= monthly("jan2019", "MY") & year_month_gs <= monthly(
> "dec2022", "MY")
(277 observations deleted)

. 
. keep cpi_limited year_month_gs

. 
. save `cpi_limited', replace
(file /scratch/jtatarka/14534719/St1324894.000001 not found)
file /scratch/jtatarka/14534719/St1324894.000001 saved as .dta format

. *****************************************************************************
> ***
. **** Part 2: Bring in Main Data and Merge 
. *****************************************************************************
> ***
. **********************************
. **** 2.1 Load in main data and do initial cleaning 
. **********************************
. * import delimited "${intermediate_root}/7225_visits_and_spend_merge", rowran
> ge(1:10000) clear
. import delimited "${intermediate_root}/7225_visits_and_spend_merge", clear 
(encoding automatically selected: ISO-8859-1)
(51 vars, 34,738,180 obs)

. 
. *drop if no spend
. drop if raw_total_spend == .
(20,551,293 observations deleted)

. 
. *** Keep only 50 states and DC,
. * drop guam, american samoa, virgin islands, and puerto rico 
. tostring poi_cbg, replace format(%17.0g)
poi_cbg was double now str12

. replace poi_cbg = "0" + poi_cbg if strlen(poi_cbg) < 12
(2,960,211 real changes made)

. 
. gen state_fips_code = substr(poi_cbg,1,2)

. 
. drop if state_fips_code == "66" | state_fips_code == "60" | state_fips_code =
> = "78" | state_fips_code == "72"
(382 observations deleted)

. 
. * Keep branded limited service restaurants (so drop both full service and non
> -branded)
. * we do this because branded limited service restaurants have better spend co
> verage
. drop if naics_code == 722511 | brands == ""
(7,923,671 observations deleted)

. 
. compress brands
  variable brands was str44 now str37
  (43,839,838 bytes saved)

. 
. **** Drop any zeroes or wacky negatives (this drops almost entirely observati
> ons with emp == 0)
. rename dwell_240_plus emp 

. rename raw_total_spend nominal_spend

. gen visits = dwell_less_5 + dwell_5_to_10 + dwell_11_to_20 + dwell_21_to_60 +
>  dwell_61_to_120 + dwell_121_to_240

. 
. drop if emp <= 0 
(521,950 observations deleted)

. drop if nominal_spend <= 0
(1 observation deleted)

. drop if visits <= 0
(21,798 observations deleted)

. 
. ***** Drop any POI with only single observation (need multiple obs per POI to
>  use POI fixed effects)
. bys placekey: egen count = count(placekey)

. drop if count == 1
(4,659 observations deleted)

. drop count

. 
. *** Turn year_month_gs variable into a numeric format
. rename year_month_gs year_month_prelim

. gen year_month_gs = monthly(year_month_prelim, "YM")

. format year_month_gs %tm

. label variable year_month_gs "Year-month"

. 
. **********************************
. **** 2.2 Merge in Deflator  
. **********************************
. merge m:1 year_month_gs using `cpi_limited', keep(1 3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         5,714,426  
    -----------------------------------------

. 
. **********************************
. **** 2.3 Rename, clean up, and create variables  
. **********************************
. * Deflate spend
. gen spend = (100/cpi_limited)*nominal_spend 

. 
. *** label variables 
. label var spend "Spend, Jan 2019 $"

. label var nominal_spend "Raw Spend"

. label var emp "Employees"

. label var visits "Visits"

. 
. *** Create productivity measures 
. gen ln_spend_prod = ln(spend/emp)

. label var ln_spend_prod "ln(Spend Productivity)"

. gen ln_visits_prod = ln(visits/emp)

. label var ln_visits_prod "ln(Visits Productivity)"

. gen ln_spend = ln(spend)

. label var ln_spend "ln(Spend)"

. gen ln_emp = ln(emp)

. label var ln_emp "ln(Emp)"

. gen ln_visits = ln(visits)

. label var ln_visits "ln(Visits)"

. 
. * gen avg dwell time 
. * multiply middle of time bucket by share of visits from that bucket
. gen avg_dwell_time = (2.5*dwell_less_5 + 7.5*dwell_5_to_10 + 15*dwell_11_to_2
> 0 + 40*dwell_21_to_60 + 90*dwell_61_to_120 + 180*dwell_121_to_240)/visits

. label var avg_dwell_time "Avg Dwell Time"

. 
. * dwell shares
. gen dwell_0_to_10_share = (dwell_5_to_10 + dwell_less_5)/visits

. label variable dwell_0_to_10_share "0-10 min Share"

. 
. foreach var in "dwell_11_to_20" "dwell_21_to_60" {
  2.         gen `var'_share = `var'/visits
  3. } 

. label var dwell_11_to_20_share "11-20 min Share"

. label var dwell_21_to_60_share "21-60 min Share"

. 
. gen dwell_61_to_240_share = (dwell_61_to_120 + dwell_121_to_240)/visits

. label var dwell_61_to_240_share "61-240 min Share"

. 
. order placekey year_month_gs naics_code spend emp visits, first

. 
. **** Only keep important variables for replication 
. keep placekey year_month_gs naics_code spend emp visits nominal_spend ln_spen
> d ln_emp ln_visits ln_spend_prod ln_visits_prod avg_dwell_time dwell_0_to_10_
> share dwell_11_to_20_share dwell_21_to_60_share dwell_61_to_240_share brands

. 
. sort placekey year_month_gs

. 
. **********************************
. **** 2.4 Save the dataset 
. **********************************
. save "${save_root}/main_build.dta", replace
file
    /project/fagoolsb/service_industries/replication_package/datasets/built/m
    > ain_build.dta saved

. 
end of do-file


. 